rm(list = ls())
source("matrices.R")
source("agents.R")
library(parallel)
library(stats)
library(knitr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(rrrsa)
knitr::opts_chunk$set(cache=TRUE,
warning=FALSE, message=FALSE)
Helper functions of data pre-processing and combining (from sims.R).
#####
##### normalizeBy()
##### -------------
##### normaze matrix by rows or cols, maintain naming
#####
normalizeBy <- function(m, rows = TRUE) {
colNames <- colnames(m)
rowNames <- rownames(m)
normedM <- apply(m, MARGIN = ifelse(rows, 1, 2), rrrsa::rsa.normVec)
colnames(normedM) <- colNames
rownames(normedM) <- rowNames
normedM
}
#####
##### inGlobalEnv()
##### -------------
##### boolean object presence in global env
#####
inGlobalEnv <- function(item) {
item %in% ls(envir = .GlobalEnv)
}
#####
##### getLiteralSemantics()
##### ---------------------
##### literal semantics mapper between
##### normalized matrices and dataframe
##### called in mutate() within dplyr chain
#####
getLiteralSemantics <- function(matrixName, object, query) {
if (!(inGlobalEnv("simpleM") | inGlobalEnv("complexM") | inGlobalEnv("oddmanM") | inGlobalEnv("twinsM"))) {
stop("Missing normalized matrices, please run data pre-processing block")
}
if (matrixName == "simple") {
simpleM[as.character(object), as.character(query)]
} else if (matrixName == "complex") {
complexM[as.character(object), as.character(query)]
} else if (matrixName == "oddman") {
oddmanM[as.character(object), as.character(query)]
} else if (matrixName == "twins") {
twinsM[as.character(object), as.character(query)]
} else {
stop("Error in getLiteralSemantics()")
}
}
#####
##### completeQueries()
##### ---------------------
##### add "query" words for each expt
##### this is necessary for running with rrrsa::runDf
#####
completeQueries <- function(d) {
## store data
newD <- d
currMatrix <- unique(as.character(d$matrix))
## meta-data
allQueries <- c("glasses", "hat", "mustache")
allObjects <- c("foil", "logical", "target")
## The current query
currQuery <- unique(as.character(d$query))
neededQueries <- setdiff(allQueries, currQuery)
## Add in new queries
for (q in neededQueries) {
df <- data.frame(cond = as.vector(d$cond),
expt = as.vector(d$expt),
matrix = as.vector(d$matrix),
prior = as.vector(d$prior),
query = rep(q, 3),
object = as.vector(d$object),
count = rep(NA, 3),
p = rep(NA, 3),
n = as.vector(d$n),
cil = rep(NA, 3),
cih = rep(NA, 3),
priorType = as.vector(d$priorType),
priorValue = as.vector(d$priorValue),
grouper = as.vector(d$grouper))
newD <- plyr::rbind.fill(newD, df)
if (currMatrix == "simple") return(newD)
}
newD
}
# undebug(completeQueries)
# debug(completeQueries)
# completeQueries(baserate_63)
#####
##### convertListToDf()
##### -----------------
##### Convert a list where each element represents
##### data for a different grouping variable
##### useful with simulations run below
#####
convertListToDF <- function(li, rows = TRUE) {
df <- data.frame()
nItems <- seq(1, length(li))
if (rows) {
for (i in nItems) {
df <- rbind(df, as.data.frame(li[[i]]))
}
} else {
df <- data.frame(matrix(nrow = length(li[[1]]), ncol = 1))
for (i in nItems) {
df <- cbind(df, as.data.frame(li[[i]]))
}
return(df[, -1])
}
df
}
#####
##### rrrsa_ready_d
##### -------------
##### process data for use with rrrsa
##### NOTE :: this is not a generic fn
##### an makes certain assumptions about column naming
##### (eg. colnames -> object, expt, n, matrix)
#####
rrrsa_ready_df <- function(d, foil_name = "foil", logical_name = "logical", target_name = "target") {
## Initial pre-processing before adding in new cols and then adding literal semantics
processedD <- d %>%
## Gather priors - we're only interested in prior for current object (need to use NSE to make
## compatible with boostrapped priors)
gather_("priorType", "priorValue", c(foil_name, logical_name, target_name))
processedD <- processedD %>%
## Filter out any mismatches between current object and priorType
## Do rowwise with grep so that we can use with boostrap naming conventions
rowwise %>%
filter(grepl(object, priorType)) %>%
rowwise %>%
## Grouping variable concatenates experiment type and sample size (expt-n)
mutate(grouper = paste0(as.character(expt), "-", as.character(n)))
fullData <- plyr::ddply(processedD, .variables = ("grouper"), .fun = completeQueries) %>%
rowwise %>%
## Each row gets a unique literal semantics from getLiteraSemantics() helper
mutate(speaker.p = getLiteralSemantics(matrix, query = query, object = object)) %>%
arrange(expt, n)
fullData
}
set.seed(1)
ALPHAS <- seq(0, 5, by = 0.1)
DEPTHS <- seq(0, 5)
## This is `d` df from line 128 in sims.Rmd
d <- read.csv("data/models.csv")
## Renamed, normalized matrices from matrices.R
simpleM <- normalizeBy(simple, rows = FALSE)
complexM <- normalizeBy(complex, rows = FALSE)
twinsM <- normalizeBy(twins, rows = FALSE)
oddmanM <- normalizeBy(oddman, rows = FALSE)
NOTE:: To work with rrrsa::runDf we need each matrix to include the literal semantics of all the queries (even if they weren’t actually queried) Look at group ‘baserate-63’: only ‘glasses’ is queried, but we need to add the literal semantics for ‘hat’ and ‘mustache’ as well.
fullData <- rrrsa_ready_df(d)
Helper if we want to avoid using rsa.tuneDepthAlpha()
## tune_depth_alpha()
## ------------------
## custom tuning if we don't want to use rsa.tuneDepthApha()
tune_depth_alpha <- function(d,
depths,
alphas,
usePrior,
usePriorEveryRecurse) {
res <- data.frame(r=c(), alpha=c(), depth=c())
for (depth in depths) {
for (alpha in alphas) {
## Prior
if (usePrior) {
curr_out <- d %>%
split(.$grouper) %>%
purrr::map_df(~rsa.runDf(data=.x,
quantityVarName="object",
semanticsVarName="speaker.p",
itemVarName="query",
priorsVarName="priorValue",
alpha=alpha,
depth=depth,
usePriorEveryRecurse=userPriorEveryRecurse)) %>%
rowwise %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
depth = depth,
alpha=alpha) %>%
filter(tuningData == "keep")
}
## No prior
else {
curr_out <- d %>%
split(.$grouper) %>%
purrr::map_df(~rsa.runDf(data=.x,
quantityVarName="object",
semanticsVarName="speaker.p",
itemVarName="query",
alpha=alpha,
depth=depth,
usePriorEveryRecurse=userPriorEveryRecurse)) %>%
rowwise %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
depth = depth,
alpha=alpha) %>%
filter(tuningData == "keep")
}
res <- rbind(res, data.frame(r=cor(curr_out$p, curr_out$preds), alpha=alpha, depth=depth))
}
}
res
}
Testing custom tune_depth_alpha()
tune1 <- tune_depth_alpha(fullData, DEPTHS, ALPHAS, FALSE)
tune1 %>%
summarise(max_r=max(r),
max_alpha=alpha[which.max(r)],
max_depth=depth[which.max(r)])
“Private knowledge” priors are only multiplied at the bottom level recursion.
# sims_prior <- tune_depth_alpha(fullData, depths=DEPTHS, alphas=1, usePrior=TRUE, usePriorEveryRecurse=FALSE)
## Pre-sort by grouper
fullData <- fullData %>% arrange(grouper)
sims_prior_temp_pk <- lapply(DEPTHS, function(depth) {
plyr::ddply(fullData, .variables = c("grouper"), rsa.runDf,
quantityVarName = "object", semanticsVarName = "speaker.p",
itemVarName = "query", priorsVarName = "priorValue", depth = depth, usePriorEveryRecurse = FALSE) %>%
rowwise %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
depth = depth) %>%
filter(tuningData == "keep")
})
sims_prior_pk <- convertListToDF(sims_prior_temp_pk)
## written with purrr
##
# sims_prior_temp2 <- lapply(DEPTHS, function(depth) {
# # plyr::ddply(fullData, .variables = c("grouper"), rsa.runDf,
# # quantityVarName = "object", semanticsVarName = "speaker.p",
# # itemVarName = "query", priorsVarName = "priorValue", depth = depth, usePriorEveryRecurse = FALSE) %>%
# fullData %>%
# split(.$grouper) %>%
# purrr::map_df(~rsa.runDf(data=.x,
# quantityVarName="object",
# semanticsVarName="speaker.p",
# itemVarName="query",
# priorsVarName="priorValue",
# depth=depth,
# usePriorEveryRecurse=FALSE)) %>%
# rowwise %>%
# mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
# depth = depth) %>%
# filter(tuningData == "keep")
# })
# sims_prior2 <- convertListToDF(sims_prior_temp2)
sims_prior_pk %>%
filter(depth < 3) %>%
ggplot(., aes(x = preds, y = p, col = expt, pch = object)) +
geom_pointrange(aes(ymin = cil, ymax = cih)) +
xlim(c(0,1)) + ylim(c(0,1)) +
facet_wrap(~depth) +
ylab("Proportion choosing target") +
xlab("Model Predictions") +
geom_smooth(method="lm", aes(group=1)) +
geom_abline(slope = 1, intercept = 0, lty=2) +
ggtitle("Private knowledge priors model sims, alpha = 1") + theme_bw()
ggplot(sims_prior_pk, aes(x = preds, y = p, col = expt, pch = object)) +
geom_pointrange(aes(ymin = cil, ymax = cih)) +
xlim(c(0,1)) + ylim(c(0,1)) +
facet_wrap(~depth) +
ylab("Proportion choosing target") +
xlab("Model Predictions") +
geom_smooth(method="lm", aes(group=1)) +
geom_abline(slope = 1, intercept = 0, lty=2) +
ggtitle("Private knowledge priors model sims, alpha = 1") + theme_bw()
print("Private knowledge priors models, alpha held at 1, varied depth")
## [1] "Private knowledge priors models, alpha held at 1, varied depth"
cors_pk <- sims_prior_pk %>%
group_by(depth) %>%
summarise("cor" = cor(p, preds)) %>%
mutate("alpha" = 1)
cors_pk %>%
kable()
| depth | cor | alpha |
|---|---|---|
| 0 | 0.7357369 | 1 |
| 1 | 0.8581523 | 1 |
| 2 | 0.9281867 | 1 |
| 3 | 0.9472652 | 1 |
| 4 | 0.9523711 | 1 |
| 5 | 0.9528916 | 1 |
## Need to identify indices we want for performance comparisons
fullData <- fullData %>%
rowwise %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"))
keepIndices <- c("keep", "throwout")
compareIndices <- which(fullData$tuningData == "keep")
## Parallelize fitting
priorFit_pk <- mclapply(DEPTHS, function(i) { ## parallelize over depths
rsa.tuneDepthAlpha(fullData, quantityVarName = "object",
semanticsVarName = "speaker.p", itemVarName = "query",
priorsVarName = "priorValue", groupName = "grouper",
compareDataName = "p", compareIndices = compareIndices,
depth = i, alphas = ALPHAS, usePriorEveryRecurse = FALSE)
},
mc.cores = detectCores())
aggregateTuneData_pk <- convertListToDF(priorFit_pk)
## Get best model performance by alpha, depth
maxCors_pk <- aggregateTuneData_pk %>%
group_by(depth) %>%
summarize(maxCor = max(cor),
alpha = alpha[which.max(cor)])
aggregateTuneData_plot_pk <- aggregateTuneData_pk %>%
mutate(Recursive_depth = as.factor(depth))
## Plot tuning (Plot adapted for personal_site)
ggplot(aggregateTuneData_plot_pk, aes(x = alpha, y = cor, col=Recursive_depth)) +
geom_point(alpha=0.6, size=4) +
geom_vline(xintercept=maxCors_pk$alpha, col="grey", lty="dotdash")+
ylab("Correlation") +
scale_x_continuous(breaks = round(seq(0, 6, by = 0.1)), "Rationality (alpha level)") +
# theme_solarized() +
theme_bw() +
theme(legend.position="bottom", legend.box="horizontal",
legend.text=element_text(size=8), legend.direction="horizontal")
depths_param_data_pk <- maxCors_pk %>%
arrange(depth)
best_prior_models_fitted_alpha_pk <- convertListToDF(lapply(DEPTHS, function(depth) {
plyr::ddply(fullData, .variables = c("grouper"), rsa.runDf,
quantityVarName = "object", semanticsVarName = "speaker.p",
priors = "priorValue", usePriorEveryRecurse = FALSE,
itemVarName = "query", depth = depths_param_data_pk$depth[depth + 1],
alpha = depths_param_data_pk$alpha[depth + 1]) %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
depth = depth) %>%
filter(tuningData == "keep")
}))
## Quick check
## cor(best_prior_models_fitted_alpha[best_prior_models_fitted_alpha$depth == 1, ]$p, best_prior_models_fitted_alpha[best_prior_models_fitted_alpha$depth == 1, ]$preds)
print("Private knowledge priors model performance for recursive depth 0-5 with fitted alphas")
## [1] "Private knowledge priors model performance for recursive depth 0-5 with fitted alphas"
maxCors_pk %>% kable() ## performance data
| depth | maxCor | alpha |
|---|---|---|
| 0 | 0.7357369 | 0.0 |
| 1 | 0.9453030 | 3.1 |
| 2 | 0.9522229 | 1.4 |
| 3 | 0.9527394 | 1.1 |
| 4 | 0.9523711 | 1.0 |
| 5 | 0.9528916 | 1.0 |
## plots
ggplot(best_prior_models_fitted_alpha_pk, aes(x = preds, y = p, col = expt, pch = object)) +
geom_pointrange(aes(ymin = cil, ymax = cih)) +
xlim(c(0,1)) + ylim(c(0,1)) +
facet_wrap(~depth, labeller = label_both) +
ylab("Proportion choosing target") +
xlab("Model Predictions") +
geom_smooth(method="lm", aes(group=1)) +
geom_abline(slope = 1, intercept = 0, lty=2) +
ggtitle("Private knowledge priors model performance for recursive depth 0-5 with fitted alphas") + theme_bw()
“Common knowledge” priors are included at every level of recursion.
## Pre-sort by grouper
fullData <- fullData %>% arrange(grouper)
sims_prior_temp_ck <- lapply(DEPTHS, function(depth) {
plyr::ddply(fullData, .variables = c("grouper"), rsa.runDf,
quantityVarName = "object", semanticsVarName = "speaker.p",
itemVarName = "query", priorsVarName = "priorValue", depth = depth, usePriorEveryRecurse = TRUE) %>%
rowwise %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
depth = depth) %>%
filter(tuningData == "keep")
})
sims_prior_ck <- convertListToDF(sims_prior_temp_ck)
sims_prior_ck %>%
filter(depth < 3) %>%
ggplot(., aes(x = preds, y = p, col = expt, pch = object)) +
geom_pointrange(aes(ymin = cil, ymax = cih)) +
xlim(c(0,1)) + ylim(c(0,1)) +
facet_wrap(~depth) +
ylab("Proportion choosing target") +
xlab("Model Predictions") +
geom_smooth(method="lm", aes(group=1)) +
geom_abline(slope = 1, intercept = 0, lty=2) +
ggtitle("Common knowledge priors model sims, alpha = 1") + theme_bw()
ggplot(sims_prior_ck, aes(x = preds, y = p, col = expt, pch = object)) +
geom_pointrange(aes(ymin = cil, ymax = cih)) +
xlim(c(0,1)) + ylim(c(0,1)) +
facet_wrap(~depth) +
ylab("Proportion choosing target") +
xlab("Model Predictions") +
geom_smooth(method="lm", aes(group=1)) +
geom_abline(slope = 1, intercept = 0, lty=2) +
ggtitle("Common knowledge priors model sims, alpha = 1") + theme_bw()
print("Common knowledge priors models, alpha held at 1, varied depth")
## [1] "Common knowledge priors models, alpha held at 1, varied depth"
cors_ck <- sims_prior_ck %>%
group_by(depth) %>%
summarise("cor" = cor(p, preds)) %>%
mutate("alpha" = 1)
cors_ck %>%
kable()
| depth | cor | alpha |
|---|---|---|
| 0 | 0.7357369 | 1 |
| 1 | 0.8581523 | 1 |
| 2 | 0.8687076 | 1 |
| 3 | 0.8720136 | 1 |
| 4 | 0.8726817 | 1 |
| 5 | 0.8723590 | 1 |
## Need to identify indices we want for performance comparisons
fullData <- fullData %>%
rowwise %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"))
keepIndices <- c("keep", "throwout")
compareIndices <- which(fullData$tuningData == "keep")
## Parallelize fitting
priorFit_ck <- mclapply(DEPTHS, function(i) { ## parallelize over depths
rsa.tuneDepthAlpha(fullData, quantityVarName = "object",
semanticsVarName = "speaker.p", itemVarName = "query",
priorsVarName = "priorValue", groupName = "grouper",
compareDataName = "p", compareIndices = compareIndices,
depth = i, alphas = ALPHAS, usePriorEveryRecurse = TRUE)
},
mc.cores = detectCores())
aggregateTuneData_ck <- convertListToDF(priorFit_ck)
## Get best model performance by alpha, depth
maxCors_ck <- aggregateTuneData_ck %>%
group_by(depth) %>%
summarize(maxCor = max(cor),
alpha = alpha[which.max(cor)])
aggregateTuneData_plot_ck <- aggregateTuneData_ck %>%
mutate(Recursive_depth = as.factor(depth))
## Plot tuning (Plot adapted for personal_site)
ggplot(aggregateTuneData_plot_ck, aes(x = alpha, y = cor, col=Recursive_depth)) +
geom_point(alpha=0.6, size=4) +
geom_vline(xintercept=maxCors_ck$alpha, col="grey", lty="dotdash")+
ylab("Correlation") +
scale_x_continuous(breaks = round(seq(0, 6, by = 0.1)), "Rationality (alpha level)") +
# theme_solarized() +
theme_bw() +
theme(legend.position="bottom", legend.box="horizontal",
legend.text=element_text(size=8), legend.direction="horizontal")
depths_param_data_ck <- maxCors_ck %>%
arrange(depth)
best_ck_prior_models_fitted_alpha <- convertListToDF(lapply(DEPTHS, function(depth) {
plyr::ddply(fullData, .variables = c("grouper"), rsa.runDf,
quantityVarName = "object", semanticsVarName = "speaker.p",
priors = "priorValue", usePriorEveryRecurse = FALSE,
itemVarName = "query", depth = depths_param_data_ck$depth[depth + 1], alpha = depths_param_data_ck$alpha[depth + 1]) %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
depth = depth) %>%
filter(tuningData == "keep")
}))
print("Common knowledge priors performance for recursive depth 0-5 with fitted alphas")
## [1] "Common knowledge priors performance for recursive depth 0-5 with fitted alphas"
maxCors_ck ## performance data
## # A tibble: 6 × 3
## depth maxCor alpha
## <dbl> <dbl> <dbl>
## 1 0 0.7357369 0.0
## 2 1 0.9453030 3.1
## 3 2 0.9180203 2.0
## 4 3 0.9138164 3.3
## 5 4 0.9087468 3.0
## 6 5 0.9007427 2.9
## plots
ggplot(best_ck_prior_models_fitted_alpha, aes(x = preds, y = p, col = expt, pch = object)) +
geom_pointrange(aes(ymin = cil, ymax = cih)) +
xlim(c(0,1)) + ylim(c(0,1)) +
facet_wrap(~depth, labeller = label_both) +
ylab("Proportion choosing target") +
xlab("Model Predictions") +
geom_smooth(method="lm", aes(group=1)) +
geom_abline(slope = 1, intercept = 0, lty=2) +
ggtitle("Common knowledge priors model performance for recursive depth 0-5 with fitted alphas") + theme_bw()
sims_NoPrior_temp <- lapply(DEPTHS, function(depth) {
plyr::ddply(fullData, .variables = c("grouper"), rsa.runDf,
quantityVarName = "object", semanticsVarName = "speaker.p",
itemVarName = "query", depth = depth) %>%
rowwise %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
depth = depth) %>%
filter(tuningData == "keep")
})
sims_NoPrior <- convertListToDF(sims_NoPrior_temp)
sims_NoPrior %>%
filter(depth < 3) %>%
ggplot(., aes(x = preds, y = p, col = expt, pch = object)) +
geom_pointrange(aes(ymin = cil, ymax = cih)) +
xlim(c(0,1)) + ylim(c(0,1)) +
facet_wrap(~depth) +
ylab("Proportion choosing target") +
xlab("Model Predictions") +
geom_smooth(method="lm", aes(group=1)) +
geom_abline(slope = 1, intercept = 0, lty=2) + theme_bw()
ggplot(sims_NoPrior, aes(x = preds, y = p, col = expt, pch = object)) +
geom_pointrange(aes(ymin = cil, ymax = cih)) +
xlim(c(0,1)) + ylim(c(0,1)) +
facet_wrap(~depth) +
ylab("Proportion choosing target") +
xlab("Model Predictions") +
geom_smooth(method="lm", aes(group=1)) +
geom_abline(slope = 1, intercept = 0, lty=2)+ theme_bw()
print("No priors models, alpha held at 1, varied depth")
## [1] "No priors models, alpha held at 1, varied depth"
cors_noPriors <- sims_NoPrior %>%
group_by(depth) %>%
summarise("cor" = cor(p, preds)) %>%
mutate("alpha" = 1)
cors_noPriors %>%
kable()
| depth | cor | alpha |
|---|---|---|
| 0 | 0.7357369 | 1 |
| 1 | 0.9425637 | 1 |
| 2 | 0.9505864 | 1 |
| 3 | 0.9474159 | 1 |
| 4 | 0.9436402 | 1 |
| 5 | 0.9404205 | 1 |
## Check that we have fullData and compareIndices for tuning
if (!(inGlobalEnv("fullData") & inGlobalEnv("compareIndices"))) {
fullData <- fullData %>%
rowwise %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"))
keepIndices <- c("keep", "throwout")
compareIndices <- which(fullData$tuningData == "keep")
}
## Parallelize fitting
noPriorFit <- mclapply(DEPTHS, function(i) { ## parallelize over depths
rsa.tuneDepthAlpha(fullData, quantityVarName = "object",
semanticsVarName = "speaker.p", itemVarName = "query",
compareIndices = compareIndices, compareDataName = "p",
groupName = "grouper",
depth = i, alphas = ALPHAS)
},
mc.cores = detectCores())
aggregateTuneData_noPriors <- convertListToDF(noPriorFit)
## Get best model performance by alpha, depth
maxCors_noPriors <- aggregateTuneData_noPriors %>%
group_by(depth) %>%
summarize(maxCor = max(cor),
alpha = alpha[which.max(cor)])
## Plot tuning
ggplot(aggregateTuneData_noPriors, aes(x = alpha, y = cor, col = as.factor(depth))) +
geom_point() +
ylim(0.75, 1) +
geom_vline(xintercept = maxCors_noPriors$alpha, lty = 4, col = "grey50") +
scale_x_continuous(breaks = round(seq(1, 6, by = 0.1)), "alpha") +
ggtitle("Model performance - no priors") + theme_bw()
depths_param_data <- maxCors_noPriors %>%
arrange(depth)
best_noPrior_models_fitted_alpha <- convertListToDF(lapply(DEPTHS, function(depth) { ## parallelize over depths
plyr::ddply(fullData, .variables = c("grouper"), rsa.runDf,
quantityVarName = "object", semanticsVarName = "speaker.p",
usePriorEveryRecurse = FALSE, itemVarName = "query",
depth = depths_param_data$depth[depth + 1], alpha = depths_param_data$alpha[depth + 1]) %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
depth = depth) %>%
filter(tuningData == "keep")
}))
## Quick check
## cor(best_noPrior_models_fitted_alpha[best_noPrior_models_fitted_alpha$depth == 1, ]$p, best_noPrior_models_fitted_alpha[best_noPrior_models_fitted_alpha$depth == 1, ]$preds)
print("Model performance for recursive depth 0-5 with fitted alphas no priors")
## [1] "Model performance for recursive depth 0-5 with fitted alphas no priors"
maxCors_noPriors ## performance data
## # A tibble: 6 × 3
## depth maxCor alpha
## <dbl> <dbl> <dbl>
## 1 0 0.7357369 0.0
## 2 1 0.9457362 1.5
## 3 2 0.9513002 0.9
## 4 3 0.9520095 0.8
## 5 4 0.9520683 0.7
## 6 5 0.9521724 0.7
## plots
ggplot(best_noPrior_models_fitted_alpha, aes(x = preds, y = p, col = expt, pch = object)) +
geom_pointrange(aes(ymin = cil, ymax = cih)) +
xlim(c(0,1)) + ylim(c(0,1)) +
facet_wrap(~depth, labeller = label_both) +
ylab("Proportion choosing target") +
xlab("Model Predictions") +
geom_smooth(method="lm", aes(group=1)) +
geom_abline(slope = 1, intercept = 0, lty=2) +
ggtitle("No-prior model performance for recursive depth 0-5 with fitted alphas") + theme_bw()
Combine fitted alphas data
no_fit_alphas_all_models <- rbind(
cors_pk %>% mutate(prior_type="pk"),
cors_ck %>% mutate(prior_type="ck"),
cors_noPriors %>% mutate(prior_type="none")
)
fit_alphas_all_models <- rbind(
maxCors_pk %>% mutate(prior_type="pk"),
maxCors_ck %>% mutate(prior_type="ck"),
maxCors_noPriors %>% mutate(prior_type="none")
)
all_cors_d <- merge(no_fit_alphas_all_models[,which(names(no_fit_alphas_all_models) != "alpha")], fit_alphas_all_models) %>%
gather(type, r, c(cor, maxCor)) %>%
mutate(alpha=ifelse(type=="cor", 1.0, alpha))
ggplot(all_cors_d, aes(x=as.numeric(depth), y=r, col=prior_type)) +
geom_line() +
facet_wrap(~type) +
theme_bw()
ggplot(all_cors_d, aes(x=as.numeric(depth), y=r, col=prior_type)) +
geom_point(aes(size=alpha), alpha=0.4) +
facet_wrap(~type) +
theme_bw()
merge(no_fit_alphas_all_models[,which(names(no_fit_alphas_all_models) != "alpha")], fit_alphas_all_models) %>%
rename(fitted_alpha=alpha,
unfitted_cor=cor,
fitted_cor=maxCor) %>%
kable()
| depth | prior_type | unfitted_cor | fitted_cor | fitted_alpha |
|---|---|---|---|---|
| 0 | ck | 0.7357369 | 0.7357369 | 0.0 |
| 0 | none | 0.7357369 | 0.7357369 | 0.0 |
| 0 | pk | 0.7357369 | 0.7357369 | 0.0 |
| 1 | ck | 0.8581523 | 0.9453030 | 3.1 |
| 1 | none | 0.9425637 | 0.9457362 | 1.5 |
| 1 | pk | 0.8581523 | 0.9453030 | 3.1 |
| 2 | ck | 0.8687076 | 0.9180203 | 2.0 |
| 2 | none | 0.9505864 | 0.9513002 | 0.9 |
| 2 | pk | 0.9281867 | 0.9522229 | 1.4 |
| 3 | ck | 0.8720136 | 0.9138164 | 3.3 |
| 3 | none | 0.9474159 | 0.9520095 | 0.8 |
| 3 | pk | 0.9472652 | 0.9527394 | 1.1 |
| 4 | ck | 0.8726817 | 0.9087468 | 3.0 |
| 4 | none | 0.9436402 | 0.9520683 | 0.7 |
| 4 | pk | 0.9523711 | 0.9523711 | 1.0 |
| 5 | ck | 0.8723590 | 0.9007427 | 2.9 |
| 5 | none | 0.9404205 | 0.9521724 | 0.7 |
| 5 | pk | 0.9528916 | 0.9528916 | 1.0 |
Seems like Common Knowledge prior underperforms except at the single level of recursion. Of course, results in this case are the same as the Private Knowledge prior, because the prior is included only once…
## combine priors and uniform
sims_prior <- sims_prior_pk %>%
mutate(prior = "prior")
sims_NoPrior <- sims_NoPrior %>%
mutate(prior = "uniform")
allData <- rbind(sims_prior, sims_NoPrior) %>%
mutate(model = paste0("L", depth)) %>%
arrange(prior, query, object, matrix, cond, expt, model)
## Compare with MFs data
mfD <- read.csv("data/modelRuns.csv") %>%
arrange(prior, query, object, matrix, cond, expt, model)
## Add in MF preds check correlation
allData$mdPreds <- mfD$prediction
ggplot(allData, aes(x = preds, y = mdPreds, fill = matrix, size = depth)) +
xlab("rrrsa predictions") +
ylab("MF predictions") +
geom_point(shape = 21, alpha = 0.30) + theme_bw()
# ggsave("pragmods.png", path = "~/Desktop/", width=7, height=3)
Helpers - calculating bootstrapepd CIs
#####
##### se()
##### ----------------------
##### given a numeric vector calcuate std error :: sqrt(var(v))
#####
se <- function(v) {
sqrt(var(v))
}
#####
##### ci()
##### ----------------------
##### given a numeric vector calculate confidence intervals of width, `width`
#####
ci <- function(mu, sd, n, width = 0.95) {
interval <- (1 - width) / 2
error <- (qnorm(1 - interval) * sd) / sqrt(n)
return(c("bstrp_mu" = mu, "bstrp_err" = error))
}
#####
##### get_se()
##### ----------------------
##### given a df of bootstrapped estimates return std error
#####
get_se <- function(boot_ests) {
apply(as.matrix(boot_ests), 1, se)
}
#####
##### get_ci()
##### ----------------------
##### given a df of bootstrapped estimates return `width` confidence intervals
#####
get_ci <- function(boot_ests, width = 0.95) {
apply(as.matrix(boot_ests), 1, FUN = function(ests) {
mu <- mean(ests)
sd <- sd(ests)
n <- length(ests)
return(ci(mu, sd, n, width))
})
}
#####
##### boot_vec()
##### -------------
##### multinomial bootstrap
##### given vector of counts return n boostrap
##### estimates of proportions
#####
boot_vec <- function(countsVec, n = 1) {
t(normalizeBy(rmultinom(n, sum(countsVec), countsVec), rows = FALSE))
}
#####
##### pragmods_priors_boot()
##### ----------------------
##### custom pragmods wrapper to boot_priors()
##### used with mapply() to create new prior estimates
#####
pragmods_priors_boot <- function(c1, c2, c3, n = 1) {
boot_vec(c("foil_cnts" = c1, "logical_cnts" = c2, "target_cnts" = c3), n)
}
#####
##### add_booted_priors()
##### ----------------------
##### given a df with prior counts for 'foil', 'logical' and 'target'
##### return new df with n = 1 boostrap estimates of priors
##### NOTE :: like pragmods_priors_boot this is not a generic function -
##### assumes naming convnetions established here ('foil_cnts', 'logical_cnts', 'target_cnts')
#####
add_booted_priors <- function(df) {
res <- convertListToDF(with(df, mapply(pragmods_priors_boot, foil_cnts, logical_cnts, target_cnts, SIMPLIFY = FALSE)))
names(res) <- c("foil_bt", "logical_bt", "target_bt")
cbind(df, res)
}
#####
##### run_boot_sims()
##### ---------------------
##### Run `n_sims` simulations with `alhpa`, `depth`
##### Expects a df `d_prior_counts` that contains prior counts data
#####
run_boot_sims <- function(d_prior_counts, alpha = 1, depth = 1, n_sims = 100, verbose = TRUE) {
## Display run
if (verbose) {
cat(paste("Running ", n_sims, " simulations with alpha = ", alpha, ""))
}
## Store n boostrapped simulations
bstrap_sims <- mclapply(seq(1, n_sims), FUN = function(i) {
## Convert to rrrsa df
d_booted_priors <-
rrrsa_ready_df(add_booted_priors(d_prior_counts),
foil_name = "foil_bt", logical_name = "logical_bt", target_name = "target_bt")
## Run rrrsa
res <- plyr::ddply(d_booted_priors, .variables = c("grouper"), rsa.runDf,
quantityVarName = "object", semanticsVarName = "speaker.p",
itemVarName = "query", alpha = alpha, depth = depth,
priorsVarName = "priorValue", usePriorEveryRecurse = FALSE) %>%
rowwise %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
depth = depth) %>%
filter(tuningData == "keep")
## Return predictions
res$preds
},
mc.cores = detectCores())
## Convert simulations to df
pred_sims <- convertListToDF(bstrap_sims, rows = FALSE)
pred_sims
}
## Get prior counts data (from sims.R)
priorCounts <- read.csv("data/prior_counts.csv")
names(priorCounts)[1:3] <- c("foil_cnts", "logical_cnts", "target_cnts")
## Merge with our original data set
d_prior_counts <- merge(d, priorCounts, by = "prior", all = TRUE)
Let’s lookat at bootstrapped levels at a single level of recursion. These next two code blocks are useful for verifying that were measuring the correct quantitites.
depth <- 4
d5_bstrap_sims <- run_boot_sims(d_prior_counts, depth = depth, n_sims = 100, verbose = FALSE)
d5_bstrap_sims$simNum <- seq(1, nrow(d5_bstrap_sims))
d5_bstrap_sims_long <- d5_bstrap_sims %>%
gather(itNum, val, -simNum) %>%
select(simNum, val)
## How do the sims look (apprx normally distr)?
d5_bstrap_sims_long %>%
filter(val != 1 & val != 0 & val != 0.5) %>%
ggplot(aes(val, fill = simNum)) +
geom_density() +
facet_wrap(~simNum) + theme_bw()
d5_prior_model_sims <- sims_prior[which(sims_prior$depth == depth), ]
errs <- data.frame()
for (sim_row in seq(1, nrow(d5_bstrap_sims))) {
## Get diff with our estimate
diffs <- d5_prior_model_sims$preds[sim_row] - unlist(d5_bstrap_sims[sim_row, ])
errs <- rbind(errs, diffs)
}
cis <- data.frame(t(apply(errs, MARGIN = 1, FUN = quantile, c(0.05, 0.95))))
names(cis) <- c('low', 'hi')
d5_bstrap_cis <- cbind(d5_prior_model_sims, cis)
## Need to add bounds to CIs
d5_bstrap_cis <- d5_bstrap_cis %>%
mutate(ci_low = preds - hi,
ci_hi = preds - low)
ggplot(d5_bstrap_cis, aes(x = preds, y = p, col = expt, pch = object)) +
geom_errorbarh(aes(xmin = ci_hi, xmax = ci_low, height = 0.05)) +
geom_errorbar(aes(ymin = cil, ymax = cih, width = 0.05)) +
xlim(c(0,1)) + ylim(c(0,1)) +
ylab("Proportion choosing target") +
xlab("Model Predictions") +
facet_wrap(~depth) +
geom_smooth(method="lm", aes(group=1)) +
geom_abline(slope = 1, intercept = 0, lty=2) +
ggtitle(paste0("Priors model, depth = ", as.character(depth), ", bootatrapped 95% CIs")) + theme_bw()
## Run simulations for each depth
sims_prior_bstrap_errs <- read.csv("data/sims_prior_bstrap_errs.csv") ## From previous run with set.seed(1)
## Run below code if we don't have saved previous sims
## ---------------------------------------------------
if (!inGlobalEnv("sims_prior_bstrap_errs")) {
depth_sims <- mclapply(DEPTHS, FUN = function(depth) {
run_boot_sims(d_prior_counts, depth = depth, n_sims = 500, verbose = FALSE)
}, mc.cores = detectCores())
## For each simulation level
cis <- data.frame() ## df to hold CIs
# fn <- function() {
for (depth in seq(1, length(depth_sims))) {
## Current depth data
curr_data <- data.frame(depth_sims[depth])
errs <- data.frame() ## Hold ci diffs
## For each simulation (row) in the current data frame
for (sim_row in seq(1, nrow(curr_data))) {
## Get diff with our estimate
diffs <- sims_prior[which(sims_prior$depth == depth - 1), ]$preds[sim_row] - unlist(curr_data[sim_row, ])
errs <- rbind(errs, diffs)
}
cis <- rbind(cis, data.frame(t(apply(errs, MARGIN = 1, FUN = quantile, c(0.05, 0.95)))))
}
# }
# debug(fn)
# undebug(fn)
# cis <- fn()
names(cis) <- c('low', 'hi')
sims_prior_bstrap_errs <- cbind(sims_prior, cis)
## Need to add bounds to CIs
sims_prior_bstrap_errs <- sims_prior_bstrap_errs %>%
mutate(ci_low = preds - hi,
ci_hi = preds - low)
}
# write.csv(sims_prior_bstrap_errs, "sims_prior_bstrap_errs.csv")
ggplot(sims_prior_bstrap_errs, aes(x = preds, y = p, col = expt, pch = object)) +
geom_errorbarh(aes(xmin = ci_hi, xmax = ci_low, height = 0.05)) +
geom_errorbar(aes(ymin = cil, ymax = cih, width = 0.05)) +
xlim(c(0,1)) + ylim(c(0,1)) +
ylab("Proportion choosing target") +
xlab("Model Predictions") +
facet_wrap(~depth) +
geom_smooth(method="lm", aes(group=1)) +
geom_abline(slope = 1, intercept = 0, lty=2) +
ggtitle("Priors model, bootstrapped (500 sims) 95% CIs") + theme_bw()
sims_prior_bstrap_errs %>%
mutate(ci_width = hi - low,
type = paste0(cond, "-", expt)) %>%
ggplot(aes(x = depth, y = ci_width, col = type)) +
geom_point() + theme_bw()
depth_tests <- c("complex", "oddman", "simple", "twins")
priors_tests <- c("baserate", "color", "lang")
full_data_type <- fullData %>%
mutate(type = ifelse(expt %in% depth_tests, "depth_test", "priors_test"))
full_data_priors_tests <- full_data_type %>% filter(type == "priors_test")
full_data_depth_tests <- full_data_type %>% filter(type == "depth_test")
full_data_priors_tests <- full_data_priors_tests %>% arrange(grouper)
full_data_priors_tests <- full_data_priors_tests %>%
rowwise %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"))
keepIndices <- c("keep", "throwout")
compareIndices <- which(full_data_priors_tests$tuningData == "keep")
prior_d <- plyr::ddply(full_data_priors_tests, .variables = c("grouper"), .fun = rsa.runDf,
quantityVarName = "object", semanticsVarName = "speaker.p",
itemVarName = "query", priorsVarName = "priorValue",
usePriorEveryRecurse = FALSE) %>%
filter(tuningData == "keep")
noPrior_d <- plyr::ddply(full_data_priors_tests, .variables = c("grouper"), .fun = rsa.runDf,
quantityVarName = "object", semanticsVarName = "speaker.p",
itemVarName = "query",
usePriorEveryRecurse = FALSE) %>%
filter(tuningData == "keep")
join_priors_noPriors <- cbind(prior_d, noPrior_d$preds)
names(join_priors_noPriors)[length(names(join_priors_noPriors))] <- 'noPrior_preds'
join_priors_noPriors <- join_priors_noPriors %>%
gather(prior_type, value, c(preds, noPrior_preds))
ggplot(join_priors_noPriors, aes(x = value, y = p, col = prior_type)) +
geom_pointrange(aes(ymin = cil, ymax = cih)) +
xlim(c(0,1)) + ylim(c(0,1)) +
facet_wrap(~prior) +
ylab("Proportion choosing target") +
xlab("Model Predictions") + theme_bw()
# geom_smooth(method="lm", aes(group=1)) +
# geom_abline(slope = 1, intercept = 0, lty=2) +
# ggtitle("Model performance for recursive depth 0-5 with fitted alphas")
## Parallelize fitting
priorFit <- mclapply(DEPTHS, function(i) { ## parallelize over depths
rsa.tuneDepthAlpha(full_data_priors_tests, quantityVarName = "object",
semanticsVarName = "speaker.p", itemVarName = "query",
priorsVarName = "priorValue", groupName = "grouper",
compareDataName = "p", compareIndices = compareIndices,
depth = i, alphas = ALPHAS, usePriorEveryRecurse = FALSE)
},
mc.cores = detectCores())
aggregateTuneData_priors <- convertListToDF(priorFit)
aggregateTuneData_priors %>%
group_by(depth) %>%
summarize(maxCor = max(cor),
alpha = alpha[which.max(cor)])
## # A tibble: 6 × 3
## depth maxCor alpha
## <dbl> <dbl> <dbl>
## 1 0 0.6312373 0.0
## 2 1 0.9470066 3.8
## 3 2 0.9469632 1.6
## 4 3 0.9465052 1.3
## 5 4 0.9469024 1.1
## 6 5 0.9459132 1.1
noPriorFit <- mclapply(DEPTHS, function(i) { ## parallelize over depths
rsa.tuneDepthAlpha(full_data_priors_tests, quantityVarName = "object",
semanticsVarName = "speaker.p", itemVarName = "query",
groupName = "grouper",
compareDataName = "p", compareIndices = compareIndices,
depth = i, alphas = ALPHAS, usePriorEveryRecurse = FALSE)
},
mc.cores = detectCores())
aggregateTuneData_noPriors <- convertListToDF(noPriorFit)
aggregateTuneData_noPriors %>%
group_by(depth) %>%
summarize(maxCor = max(cor),
alpha = alpha[which.max(cor)])
## # A tibble: 6 × 3
## depth maxCor alpha
## <dbl> <dbl> <dbl>
## 1 0 0.6312373 0.0
## 2 1 0.9489123 1.9
## 3 2 0.9488210 1.0
## 4 3 0.9488951 0.8
## 5 4 0.9488517 0.8
## 6 5 0.9487172 0.8
combined_tuning <- left_join(aggregateTuneData_priors, aggregateTuneData_noPriors, by = c("depth", "alpha"))
names(combined_tuning) <- c("priors", "depth", "alpha", "noPriors")
combined_tuning <- combined_tuning %>%
gather(type, correlation, -c(depth, alpha))
ggplot(combined_tuning, aes(x = alpha, y = correlation, col = as.factor(type))) +
geom_point() +
facet_wrap(~depth) +
ggtitle("Impact of priors on model preds with and without priors") + theme_bw()
full_data_depth_tests <- full_data_depth_tests %>% arrange(grouper)
full_data_depth_tests <- full_data_depth_tests %>%
rowwise %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"))
keepIndices <- c("keep", "throwout")
compareIndices <- which(full_data_depth_tests$tuningData == "keep")
sims_prior_temp <- lapply(seq(0, 2), function(depth) {
plyr::ddply(full_data_depth_tests, .variables = c("grouper"), rsa.runDf,
quantityVarName = "object", semanticsVarName = "speaker.p",
itemVarName = "query", priorsVarName = "priorValue", depth = depth, usePriorEveryRecurse = FALSE) %>%
rowwise %>%
mutate(tuningData = ifelse(!is.na(count), "keep", "throwout"),
depth = depth) %>%
filter(tuningData == "keep")
})
sims_prior_temp <- convertListToDF(sims_prior_temp)
sims_prior_temp %>% filter(preds != 0, preds != 1) %>%
ggplot(aes(x = preds, y = p, col = expt, pch = object)) +
geom_pointrange(aes(ymin = cil, ymax = cih)) +
xlim(c(0,1)) + ylim(c(0,1)) +
facet_wrap(expt~depth, labeller = label_both, ncol = 3) +
ylab("Proportion choosing target") +
xlab("Model Predictions") +
geom_smooth(method="lm", aes(group=1)) +
geom_abline(slope = 1, intercept = 0, lty=2) +
ggtitle("Model performance for recursive depth 0-2 with fitted alphas") + theme_bw()
sims_prior_temp %>%
group_by(expt, depth) %>%
summarise(correlation = cor(preds, p))
## Source: local data frame [12 x 3]
## Groups: expt [?]
##
## expt depth correlation
## <fctr> <int> <dbl>
## 1 complex 0 0.8385561
## 2 complex 1 0.9611770
## 3 complex 2 0.9763472
## 4 oddman 0 0.9497125
## 5 oddman 1 0.9763249
## 6 oddman 2 0.9763249
## 7 simple 0 0.9001967
## 8 simple 1 0.9517313
## 9 simple 2 0.9898410
## 10 twins 0 0.9761129
## 11 twins 1 0.9970806
## 12 twins 2 0.9875595
ggplot(allData, aes(x = preds, y = p, col = prior)) +
geom_point(shape = 21, alpha = 0.85) +
facet_grid(model ~ cond) + theme_bw()
allData %>%
filter(grouper == 'baserate-72' | grouper == 'color-47') %>%
ggplot(aes(x = preds, y = p, col = prior, shape = as.factor(object))) +
geom_point(alpha = 0.5, size = 4) +
facet_grid(model ~ grouper) +
geom_abline(slope = 1, intercept = 0, lty=2) + theme_bw()
Almost looks like target and logical could be swapped… But with more recursions, the priors-model begins to improve…
temp_d <- sims_prior %>% filter(grouper == 'baserate-72' | grouper == 'color-47') %>%
arrange(cond, expt, matrix, prior, object, count, p)
temp_d$unif_preds <- sims_NoPrior %>% filter(grouper == 'baserate-72' | grouper == 'color-47') %>%
arrange(cond, expt, matrix, prior, object, count, p) %>%
mutate(unif_preds = preds) %>%
select(unif_preds) %>%
unlist()
temp_d <- temp_d %>%
mutate(delta = preds - unif_preds) %>%
arrange(-abs(delta))
## uniform vs priors
ggplot(temp_d, aes(x = unif_preds, y = preds, col = factor(grouper), size = factor(depth), shape = as.factor(object))) +
geom_point() +
geom_abline(slope = 1, intercept = 0, lty=2) + theme_bw()
## Take a look at the data
temp_d %>% kable()
| cond | expt | matrix | prior | query | object | count | p | n | cil | cih | priorType | priorValue | grouper | speaker.p | preds | tuningData | depth | unif_preds | delta |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| foil | color | simple | prior | glasses | logical | 6 | 0.1276596 | 47 | 0.0474016 | 0.2323648 | logical | 0.5185185 | color-47 | 0.5 | 0.7000000 | keep | 1 | 0.2500000 | 0.4500000 |
| foil | color | simple | prior | glasses | target | 39 | 0.8297872 | 47 | 0.7146994 | 0.9234288 | target | 0.0740741 | color-47 | 0.5 | 0.3000000 | keep | 1 | 0.7500000 | -0.4500000 |
| foil | color | simple | prior | glasses | logical | 6 | 0.1276596 | 47 | 0.0474016 | 0.2323648 | logical | 0.5185185 | color-47 | 0.5 | 0.5833333 | keep | 2 | 0.1666667 | 0.4166667 |
| foil | color | simple | prior | glasses | target | 39 | 0.8297872 | 47 | 0.7146994 | 0.9234288 | target | 0.0740741 | color-47 | 0.5 | 0.4166667 | keep | 2 | 0.8333333 | -0.4166667 |
| foil | color | simple | prior | glasses | logical | 6 | 0.1276596 | 47 | 0.0474016 | 0.2323648 | logical | 0.5185185 | color-47 | 0.5 | 0.5000000 | keep | 3 | 0.1250000 | 0.3750000 |
| foil | color | simple | prior | glasses | target | 39 | 0.8297872 | 47 | 0.7146994 | 0.9234288 | target | 0.0740741 | color-47 | 0.5 | 0.5000000 | keep | 3 | 0.8750000 | -0.3750000 |
| foil | color | simple | prior | glasses | logical | 6 | 0.1276596 | 47 | 0.0474016 | 0.2323648 | logical | 0.5185185 | color-47 | 0.5 | 0.4375000 | keep | 4 | 0.1000000 | 0.3375000 |
| foil | color | simple | prior | glasses | target | 39 | 0.8297872 | 47 | 0.7146994 | 0.9234288 | target | 0.0740741 | color-47 | 0.5 | 0.5625000 | keep | 4 | 0.9000000 | -0.3375000 |
| 0.11 | baserate | simple | prior | glasses | logical | 12 | 0.1666667 | 72 | 0.0893488 | 0.2582384 | logical | 0.7735849 | baserate-72 | 0.5 | 0.5774648 | keep | 1 | 0.2500000 | 0.3274648 |
| 0.11 | baserate | simple | prior | glasses | target | 54 | 0.7500000 | 72 | 0.6464264 | 0.8429448 | target | 0.1886792 | baserate-72 | 0.5 | 0.4225352 | keep | 1 | 0.7500000 | -0.3274648 |
| foil | color | simple | prior | glasses | target | 39 | 0.8297872 | 47 | 0.7146994 | 0.9234288 | target | 0.0740741 | color-47 | 0.5 | 0.6111111 | keep | 5 | 0.9166667 | -0.3055556 |
| foil | color | simple | prior | glasses | logical | 6 | 0.1276596 | 47 | 0.0474016 | 0.2323648 | logical | 0.5185185 | color-47 | 0.5 | 0.3888889 | keep | 5 | 0.0833333 | 0.3055556 |
| 0.11 | baserate | simple | prior | glasses | logical | 12 | 0.1666667 | 72 | 0.0893488 | 0.2582384 | logical | 0.7735849 | baserate-72 | 0.5 | 0.4505495 | keep | 2 | 0.1666667 | 0.2838828 |
| 0.11 | baserate | simple | prior | glasses | target | 54 | 0.7500000 | 72 | 0.6464264 | 0.8429448 | target | 0.1886792 | baserate-72 | 0.5 | 0.5494505 | keep | 2 | 0.8333333 | -0.2838828 |
| 0.11 | baserate | simple | prior | glasses | target | 54 | 0.7500000 | 72 | 0.6464264 | 0.8429448 | target | 0.1886792 | baserate-72 | 0.5 | 0.6306306 | keep | 3 | 0.8750000 | -0.2443694 |
| 0.11 | baserate | simple | prior | glasses | logical | 12 | 0.1666667 | 72 | 0.0893488 | 0.2582384 | logical | 0.7735849 | baserate-72 | 0.5 | 0.3693694 | keep | 3 | 0.1250000 | 0.2443694 |
| 0.11 | baserate | simple | prior | glasses | target | 54 | 0.7500000 | 72 | 0.6464264 | 0.8429448 | target | 0.1886792 | baserate-72 | 0.5 | 0.6870229 | keep | 4 | 0.9000000 | -0.2129771 |
| 0.11 | baserate | simple | prior | glasses | logical | 12 | 0.1666667 | 72 | 0.0893488 | 0.2582384 | logical | 0.7735849 | baserate-72 | 0.5 | 0.3129771 | keep | 4 | 0.1000000 | 0.2129771 |
| 0.11 | baserate | simple | prior | glasses | logical | 12 | 0.1666667 | 72 | 0.0893488 | 0.2582384 | logical | 0.7735849 | baserate-72 | 0.5 | 0.2715232 | keep | 5 | 0.0833333 | 0.1881898 |
| 0.11 | baserate | simple | prior | glasses | target | 54 | 0.7500000 | 72 | 0.6464264 | 0.8429448 | target | 0.1886792 | baserate-72 | 0.5 | 0.7284768 | keep | 5 | 0.9166667 | -0.1881898 |
| 0.11 | baserate | simple | prior | glasses | foil | 6 | 0.0833333 | 72 | 0.0300468 | 0.1547736 | foil | 0.0377358 | baserate-72 | 0.0 | 0.0000000 | keep | 0 | 0.0000000 | 0.0000000 |
| 0.11 | baserate | simple | prior | glasses | foil | 6 | 0.0833333 | 72 | 0.0300468 | 0.1547736 | foil | 0.0377358 | baserate-72 | 0.0 | 0.0000000 | keep | 1 | 0.0000000 | 0.0000000 |
| 0.11 | baserate | simple | prior | glasses | foil | 6 | 0.0833333 | 72 | 0.0300468 | 0.1547736 | foil | 0.0377358 | baserate-72 | 0.0 | 0.0000000 | keep | 2 | 0.0000000 | 0.0000000 |
| 0.11 | baserate | simple | prior | glasses | foil | 6 | 0.0833333 | 72 | 0.0300468 | 0.1547736 | foil | 0.0377358 | baserate-72 | 0.0 | 0.0000000 | keep | 3 | 0.0000000 | 0.0000000 |
| 0.11 | baserate | simple | prior | glasses | foil | 6 | 0.0833333 | 72 | 0.0300468 | 0.1547736 | foil | 0.0377358 | baserate-72 | 0.0 | 0.0000000 | keep | 4 | 0.0000000 | 0.0000000 |
| 0.11 | baserate | simple | prior | glasses | foil | 6 | 0.0833333 | 72 | 0.0300468 | 0.1547736 | foil | 0.0377358 | baserate-72 | 0.0 | 0.0000000 | keep | 5 | 0.0000000 | 0.0000000 |
| 0.11 | baserate | simple | prior | glasses | logical | 12 | 0.1666667 | 72 | 0.0893488 | 0.2582384 | logical | 0.7735849 | baserate-72 | 0.5 | 0.5000000 | keep | 0 | 0.5000000 | 0.0000000 |
| 0.11 | baserate | simple | prior | glasses | target | 54 | 0.7500000 | 72 | 0.6464264 | 0.8429448 | target | 0.1886792 | baserate-72 | 0.5 | 0.5000000 | keep | 0 | 0.5000000 | 0.0000000 |
| foil | color | simple | prior | glasses | foil | 2 | 0.0425532 | 47 | 0.0034643 | 0.1142362 | foil | 0.4074074 | color-47 | 0.0 | 0.0000000 | keep | 0 | 0.0000000 | 0.0000000 |
| foil | color | simple | prior | glasses | foil | 2 | 0.0425532 | 47 | 0.0034643 | 0.1142362 | foil | 0.4074074 | color-47 | 0.0 | 0.0000000 | keep | 1 | 0.0000000 | 0.0000000 |
| foil | color | simple | prior | glasses | foil | 2 | 0.0425532 | 47 | 0.0034643 | 0.1142362 | foil | 0.4074074 | color-47 | 0.0 | 0.0000000 | keep | 2 | 0.0000000 | 0.0000000 |
| foil | color | simple | prior | glasses | foil | 2 | 0.0425532 | 47 | 0.0034643 | 0.1142362 | foil | 0.4074074 | color-47 | 0.0 | 0.0000000 | keep | 3 | 0.0000000 | 0.0000000 |
| foil | color | simple | prior | glasses | foil | 2 | 0.0425532 | 47 | 0.0034643 | 0.1142362 | foil | 0.4074074 | color-47 | 0.0 | 0.0000000 | keep | 4 | 0.0000000 | 0.0000000 |
| foil | color | simple | prior | glasses | foil | 2 | 0.0425532 | 47 | 0.0034643 | 0.1142362 | foil | 0.4074074 | color-47 | 0.0 | 0.0000000 | keep | 5 | 0.0000000 | 0.0000000 |
| foil | color | simple | prior | glasses | logical | 6 | 0.1276596 | 47 | 0.0474016 | 0.2323648 | logical | 0.5185185 | color-47 | 0.5 | 0.5000000 | keep | 0 | 0.5000000 | 0.0000000 |
| foil | color | simple | prior | glasses | target | 39 | 0.8297872 | 47 | 0.7146994 | 0.9234288 | target | 0.0740741 | color-47 | 0.5 | 0.5000000 | keep | 0 | 0.5000000 | 0.0000000 |
Still looks like these might have been swapped…
allData %>%
filter(grouper == 'baserate-72' | grouper == 'color-47') %>%
mutate(delta = p - preds) %>%
arrange(delta, model, grouper) %>%
select(cond, delta, p, preds, grouper, query, object, model, priorType, priorValue)
Save this for future generations of rsa pirates
############################################################################
#### SAVE - ORIGINAL DATA PROCESSING
####
processedD <- d %>%
## Gather priors - we're only interested in prior for current object
gather(priorType, priorValue, c(foil, logical, target)) %>%
## Filter out any mismatches between current object and priorType
filter(object == priorType) %>%
rowwise %>%
## Grouping variable concatenates experiment type and sample size (expt-n)
mutate(grouper = paste0(as.character(expt), "-", as.character(n)))
fullData <- plyr::ddply(processedD, .variables = ("grouper"), .fun = completeQueries) %>%
rowwise %>%
## Each row gets a unique literal semantics from getLiteraSemantics() helper
mutate(speaker.p = getLiteralSemantics(matrix, query = query, object = object)) %>%
arrange(expt, n)
####
#### SAVE - ORIGINAL DATA PROCESSING
############################################################################
# sims_prior %>%
# mutate(errorH = cih - p,
# errorL = p - cil) %>%
# ggplot(., aes(x = preds, y = p, col = expt, pch = object)) +
# geom_errorbarh(aes(xmin = preds - errorL, xmax = preds + errorH)) +
# geom_errorbar(aes(ymin = cil, ymax = cih)) +
# xlim(c(0,1)) + ylim(c(0,1)) +
# facet_wrap(~depth) +
# ylab("Proportion choosing target") +
# xlab("Model Predictions") +
# geom_smooth(method="lm", aes(group=1)) +
# geom_abline(slope = 1, intercept = 0, lty=2)